# ---------------------------------------------------------------- #
# The Journal of Politics
# When Do Citizens Resist The Use of AI Algorithms in Public Policy?
#
# Replication Code for Data Preparation and Variable Generation
# Author: Shir Raviv
# Date: Feb 17, 2025
#
# This R script loads the raw survey data (rawdata.RDS), creates key variables, and 
# generates the following data-sets: 
#  DT_wide_data.RDS – A wide-format dataset with newly generated variables for the Decision-Type Experiment.
#  DT_long_data.RDS – A long-format version of the Decision-Type Experiment data, including both between-subject and within-subject components.
#  DM_wide_data.RDS – The processed data for the Decision Maker Experiment.
#
# ---------------------------------------------------------------- #
# Clear environment and free memory ---------
rm(list = ls())
gc()

# Load required packages --------
required_packages <- c("dplyr", "readr", "expss", "rio", "tidyr")
for (pkg in required_packages) {
  if (!require(pkg, character.only = TRUE)) {
    install.packages(pkg, dependencies = TRUE)
    library(pkg, character.only = TRUE)
  }
}

# Set the working directory and define export paths ---------
#setwd("insert here.. /JOP Replication")
plotpath <- "figures/"

# Load Survey Data ---------
df <- readRDS("rawdata.RDS")

# Generate Demographics Variables for both experiments---------

# Gender:
# Create a binary variable 'female' (0 = Male; 1 = Female) and a factor 'female_fac'
df <- df %>%
  mutate(
    female = case_when(
      gender %in% c(1, 3) ~ 0,
      gender == 2 ~ 1
    ),
    female_fac = factor(female, levels = c(0, 1),
                        labels = c("Male", "Female"))
  )

# Birth Year:
# add 17 to obtain respondent's birth year
df <- df %>%
  mutate(birthyear = birthday + 17)

# Age:
df <- df %>%
  mutate(age_cat = factor(age,
                          levels = c(1, 2, 3, 4, 5, 6),
                          labels = c("18 - 24", "25 - 34", "35 - 44", "45 - 54", "55 - 64", "65 +"))
  )

# Education:
df <- df %>%
  mutate(education_cat = factor(education,
                                levels = c(1, 2, 3, 4, 5, 6),
                                labels = c("Less than high school",
                                           "High school",
                                           "Some college",
                                           "Associate’s degree",
                                           "Bachelor’s degree",
                                           "Graduate degree"))
  )

# High Education:
# Define a binary indicator for having a Bachelor's degree or higher (1) versus lower qualifications (0)
df <- df %>%
  mutate(
    high_edu_ba = case_when(
      education %in% c(5, 6) ~ 1,
      education < 5 ~ 0
    ),
    high_edu_ba_cat = factor(high_edu_ba, levels = c(0, 1),
                             labels = c("Below Bachelor's Degree", "Bachelor's or Graduate Degree"))
  )

# Additional Education Variables:
# 1. low_edu_somecol: 1 if education is less than Associate’s degree (levels 1-3), 0 otherwise.
# 2. low_edu_highschool: 1 if education is less than Some college (levels 1-2), 0 otherwise.
df <- df %>%
  mutate(
    low_edu_somecol = case_when(
      education < 4 ~ 1,
      education >= 4 ~ 0
    ),
    low_edu_somecol_cat = factor(low_edu_somecol, levels = c(0, 1),
                                 labels = c("Associate’s Degree or Higher", "Some College or Less")),
    low_edu_highschool = case_when(
      education < 3 ~ 1,
      education >= 3 ~ 0
    ),
    low_edu_highschool_cat = factor(low_edu_highschool, levels = c(0, 1),
                                    labels = c("Some College or More", "High School or Less"))
  )

# Race:
# Create a factor variable for race with descriptive labels and additional binary indicators for White and Black respondents.

df <- df %>%
  mutate(
    race_cat = factor(race,
                      levels = c(1, 2, 3, 4, 5, 6, 7),
                      labels = c("Asian",
                                 "Black/African",
                                 "Hispanic/Latino",
                                 "Middle Eastern",
                                 "North African",
                                 "White",
                                 "Other")),
    white = factor(case_when(race == 6 ~ 1, TRUE ~ 0),
                   levels = c(0, 1),
                   labels = c("Non White", "White")),
    black = factor(case_when(race == 2 ~ 1, TRUE ~ 0),
                   levels = c(0, 1),
                   labels = c("Non Black", "Black"))
  )


# Generate Party Identification Variables ---------


# Create binary indicators for party identification and composite measures
df <- df %>%
  mutate(
    democrat_pid = if_else(pid == 1, 1, 0),
    republican_pid = if_else(pid == 2, 1, 0),
    independent_pid = if_else(pid == 3, 1, 0),
    pid3 = case_when(
      pid == 1 ~ 1,
      pid == 2 ~ 3,
      pid %in% c(3, 4) ~ 2
    ),
    pid3_fac = factor(pid3, levels = c(1, 2, 3),
                      labels = c("Democrat",
                                 "Independent",
                                 "Republican"))
  )


# Generate Digital Literacy Variables ----------

# Create binary indicators for digital literacy items: a score greater than 3 indicates high proficiency.
df <- df %>%
  mutate(
    high_diglit_Phishing = if_else(diglit_Phishing > 3, 1, 0),
    high_diglit_JPG      = if_else(diglit_JPG > 3, 1, 0),
    high_diglit_Cach     = if_else(diglit_Cach > 3, 1, 0),
    high_diglit_PDF      = if_else(diglit_PDF > 3, 1, 0),
    high_diglit_Tagging  = if_else(diglit_Tagging > 3, 1, 0),
    high_diglit_RSS      = if_else(diglit_RSS > 3, 1, 0),
    high_diglit_cookie   = if_else(diglit_cookie > 3, 1, 0)
  )

# Sum the digital literacy scores and create a factor variable for overall digital literacy.
df <- df %>%
  mutate(
    sum_high_diglit = expss::sum_row(high_diglit_Phishing,
                                     high_diglit_JPG,
                                     high_diglit_Cach,
                                     high_diglit_PDF,
                                     high_diglit_Tagging,
                                     high_diglit_RSS,
                                     high_diglit_cookie),
    high_diglit = factor(if_else(sum_high_diglit >= 6, 1, 0),
                         levels = c(0, 1),
                         labels = c("Low Digital Literacy", "High Digital Literacy"))
  )

# Generate Trust and Prior Knowledge Variables ------------

# Trust in Technology:
# Create a binary indicator for high trust in automated driving technology.
df <- df %>%
  mutate(hightrust_driveless = if_else(automation_trust_bhv == 1, 1, 0))

# Prior Knowledge on AI:
# Create a factor variable indicating high prior knowledge (1 if respondent has heard "a great deal" or "a lot" about AI).
df <- df %>%
  mutate(AI_have_priorknow = factor(if_else(AI_priorknow < 3, 1, 0),
                                    levels = c(0, 1),
                                    labels = c("Low Prior Knowledge", "High Prior Knowledge")))




### Preparing data for the Decision-Type Experiment  ------ # ------
# Decision-Type Experiment: Generating key outcome variables  ---------------

convert_to_binary <- function(x) {
  ifelse(is.na(x), NA, ifelse(x >= 5 & x <= 7, 1, 0))
}

df$bin_app_mech_edu <- convert_to_binary(df$DTexp_app_mech_edu)
df$bin_app_mech_poli <- convert_to_binary(df$DTexp_app_mech_poli)
df$bin_app_mech_child <- convert_to_binary(df$DTexp_app_mech_child)
df$bin_app_mech_assistind <- convert_to_binary(df$DTexp_app_mech_assistind)
df$bin_app_mech_assistcol <- convert_to_binary(df$DTexp_app_mech_assistcol)
df$bin_app_mech_sancind <- convert_to_binary(df$DTexp_app_mech_sancind)
df$bin_app_mech_sanccol <- convert_to_binary(df$DTexp_app_mech_sanccol)

# Decision-Type Experiment: Generating key treatment variables ---------------
df<-df%>%
  mutate(edu_individuals=case_when(
    edu_decision_type=="Sanctioning Collectives" |
      edu_decision_type=="Assisting Collectives"~0,
    edu_decision_type=="Sanctioning Individuals" |
      edu_decision_type=="Assisting Individuals"~1))

df<-df%>%
  mutate(edu_assisting=case_when(
    edu_decision_type=="Sanctioning Collectives" |
      edu_decision_type=="Sanctioning Individuals" ~0,
    edu_decision_type=="Assisting Collectives" |
      edu_decision_type=="Assisting Individuals"~1))

df<-df%>%
  mutate(child_individuals=case_when(
    child_decision_type=="Sanctioning Collectives" |
      child_decision_type=="Assisting Collectives"~0,
    child_decision_type=="Sanctioning Individuals" |
      child_decision_type=="Assisting Individuals"~1))

df<-df%>%
  mutate(child_assisting=case_when(
    child_decision_type=="Sanctioning Collectives" |
      child_decision_type=="Sanctioning Individuals" ~0,
    child_decision_type=="Assisting Collectives" |
      child_decision_type=="Assisting Individuals"~1))

df<-df%>%
  mutate(poli_individuals=case_when(
    poli_decision_type=="Sanctioning Collectives" |
      poli_decision_type=="Assisting Collectives"~0,
    poli_decision_type=="Sanctioning Individuals" |
      poli_decision_type=="Assisting Individuals"~1))

df<-df%>%
  mutate(poli_assisting=case_when(
    poli_decision_type=="Sanctioning Collectives" |
      poli_decision_type=="Sanctioning Individuals" ~0,
    poli_decision_type=="Assisting Collectives" |
      poli_decision_type=="Assisting Individuals"~1))




# Decision-Type Experiment: Save wide data (DT_wide_data.RDS) -----------
saveRDS(object = df, file = "DT_wide_data.RDS")


# Decision-Type Experiment: Create long version of the data  (including both between and within subjects components) ------ # ------
# Create Long Data for multi‐level analysis of decision type experience 
## Select variables and rename outcome variables
df_multi <- df %>%
  dplyr::select(ResponseId, edu_decision_type:poli_assisting) %>%
  rename_with(~paste0("outcome_", .), DTexp_app_mech_edu:DTexp_app_mech_sanccol)

## Convert to long format
# This function converts a set of wide outcome columns into a long format with recoded treatment variables.
prepare_long_data <- function(data, prefix, outcome_var_label) {
  data %>%
    rename_with(~paste0("outcome_", .), .cols = starts_with(prefix)) %>%
    pivot_longer(cols = starts_with(paste0("outcome_", prefix)),
                 names_to = "item", values_to = outcome_var_label, values_drop_na = TRUE) %>%
    mutate(
      T_decisiontype = case_when(
        item == paste0("outcome_", prefix, "assistcol") ~ 1,
        item == paste0("outcome_", prefix, "assistind") ~ 2,
        item == paste0("outcome_", prefix, "sanccol") ~ 3,
        item == paste0("outcome_", prefix, "sancind") ~ 4,
        item == paste0("outcome_", prefix, "edu") & edu_decision_type == "Assisting Collectives" ~ 1,
        item == paste0("outcome_", prefix, "edu") & edu_decision_type == "Assisting Individuals" ~ 2,
        item == paste0("outcome_", prefix, "edu") & edu_decision_type == "Sanctioning Collectives" ~ 3,
        item == paste0("outcome_", prefix, "edu") & edu_decision_type == "Sanctioning Individuals" ~ 4,
        item == paste0("outcome_", prefix, "poli") & poli_decision_type == "Assisting Collectives" ~ 1,
        item == paste0("outcome_", prefix, "poli") & poli_decision_type == "Assisting Individuals" ~ 2,
        item == paste0("outcome_", prefix, "poli") & poli_decision_type == "Sanctioning Collectives" ~ 3,
        item == paste0("outcome_", prefix, "poli") & poli_decision_type == "Sanctioning Individuals" ~ 4,
        item == paste0("outcome_", prefix, "child") & child_decision_type == "Assisting Collectives" ~ 1,
        item == paste0("outcome_", prefix, "child") & child_decision_type == "Assisting Individuals" ~ 2,
        item == paste0("outcome_", prefix, "child") & child_decision_type == "Sanctioning Collectives" ~ 3,
        item == paste0("outcome_", prefix, "child") & child_decision_type == "Sanctioning Individuals" ~ 4,
        TRUE ~ NA_real_
      ),
      T_decisiontype = factor(T_decisiontype, levels = 1:4, 
                              labels = c("assist collectives", "assist individuals", 
                                         "sanction collectives", "sanction individuals")),
      T_type_assisting = if_else(T_decisiontype %in% c("assist collectives", "assist individuals"), 1L, 
                                 if_else(T_decisiontype %in% c("sanction collectives", "sanction individuals"), 0L, NA_integer_)),
      T_type_individuals = if_else(T_decisiontype %in% c("assist individuals", "sanction individuals"), 1L, 
                                   if_else(T_decisiontype %in% c("assist collectives", "sanction collectives"), 0L, NA_integer_)),
      T_domain = case_when(
        item == paste0("outcome_", prefix, "edu") ~ "Education",
        item == paste0("outcome_", prefix, "child") ~ "Child welfare",
        item == paste0("outcome_", prefix, "poli") ~ "Policing",
        item == paste0("outcome_", prefix, "assistind") & assist_ind_decision_type == "Food stamps" ~ "Food stamps",
        item == paste0("outcome_", prefix, "assistind") & assist_ind_decision_type == "Study assistance" ~ "Study assistance",
        item == paste0("outcome_", prefix, "assistcol") & assist_col_decision_type == "Shelters for homeless" ~ "Homeless Shelters",
        item == paste0("outcome_", prefix, "assistcol") & assist_col_decision_type == "Fire stations" ~ "Fire stations",
        item == paste0("outcome_", prefix, "sancind") & sanc_ind_decision_type == "Sentence" ~ "Criminal Sentencing",
        item == paste0("outcome_", prefix, "sancind") & sanc_ind_decision_type == "Restraining order" ~ "Restraining order",
        item == paste0("outcome_", prefix, "sanccol") & sanc_col_decision_type == "Illegal building" ~ "Construction",
        item == paste0("outcome_", prefix, "sanccol") & sanc_col_decision_type == "Illegal work" ~ "Immigration",
        TRUE ~ NA_character_
      ),
      T_decision = case_when(
        item == paste0("outcome_", prefix, "edu") & edu_decision_type == "Sanctioning Individuals" ~ "Fire teachers",
        item == paste0("outcome_", prefix, "edu") & edu_decision_type == "Assisting Individuals" ~ "Promote teachers",
        item == paste0("outcome_", prefix, "edu") & edu_decision_type == "Assisting Collectives" ~ "Extra funding for scholls",
        item == paste0("outcome_", prefix, "edu") & edu_decision_type == "Sanctioning Collectives" ~ "Extra drag tests for scholls",
        item == paste0("outcome_", prefix, "child") & child_decision_type == "Sanctioning Individuals" ~ "Family invistigation",
        item == paste0("outcome_", prefix, "child") & child_decision_type == "Assisting Individuals" ~ "Family Coaching",
        item == paste0("outcome_", prefix, "child") & child_decision_type == "Assisting Collectives" ~ "Welfare centers",
        item == paste0("outcome_", prefix, "child") & child_decision_type == "Sanctioning Collectives" ~ "Welfare invistigation",
        item == paste0("outcome_", prefix, "poli") & poli_decision_type == "Sanctioning Individuals" ~ "Police monitoring",
        item == paste0("outcome_", prefix, "poli") & poli_decision_type == "Assisting Individuals" ~ "Mental assistance",
        item == paste0("outcome_", prefix, "poli") & poli_decision_type == "Assisting Collectives" ~ "Streetlights",
        item == paste0("outcome_", prefix, "poli") & poli_decision_type == "Sanctioning Collectives" ~ "Police enforcement",
        item == paste0("outcome_", prefix, "assistind") & assist_ind_decision_type == "Food stamps" ~ "Food stamps",
        item == paste0("outcome_", prefix, "assistind") & assist_ind_decision_type == "Study assistance" ~ "Study assistance",
        item == paste0("outcome_", prefix, "assistcol") & assist_col_decision_type == "Shelters for homeless" ~ "Homeless Shelters",
        item == paste0("outcome_", prefix, "assistcol") & assist_col_decision_type == "Fire stations" ~ "Fire stations",
        item == paste0("outcome_", prefix, "sancind") & sanc_ind_decision_type == "Sentence" ~ "Criminal Sentencing",
        item == paste0("outcome_", prefix, "sancind") & sanc_ind_decision_type == "Restraining order" ~ "Restraining order",
        item == paste0("outcome_", prefix, "sanccol") & sanc_col_decision_type == "Illegal building" ~ "Construction",
        item == paste0("outcome_", prefix, "sanccol") & sanc_col_decision_type == "Illegal work" ~ "Immigration",
        TRUE ~ NA_character_
      )
    )
}
# Prepare long-format datasets for each outcome type:
# 1. Appropriateness 
long_app <- prepare_long_data(df, "DTexp_app_mech_", "outcome_approp")

# 2. Fairness 
long_fair <- prepare_long_data(df, "DTexp_fair_mech_", "outcome_fair") %>%
  dplyr::select(ResponseId, T_decision, outcome_fair)

# 3. Accuracy 
long_accur <- prepare_long_data(df, "DTexp_accur_mech_", "outcome_accur") %>%
  dplyr::select(ResponseId, T_decision, outcome_accur)

# Combine together (3 outcomes) 
combined_data <- left_join(long_app, long_fair, by = c("ResponseId", "T_decision"))
combined_df <- left_join(combined_data, long_accur, by = c("ResponseId", "T_decision"))



# Create a variable to indicate the component of the matrix 
# between subject 3 first items asked about 3 policy domains or 
# within subjects component asked about the four decision type as well as attention check item
combined_df <- combined_df %>%
  mutate(component_matrix = case_when(
    T_domain %in% c("Education", "Policing", "Child welfare") ~ 0,
    TRUE ~ 1
  ))
combined_df$component_matrix <- factor(combined_df$component_matrix, levels = c(0, 1),
                                       labels = c("Between", "Within"))


# Create ordering variables 
combined_df <- combined_df %>%
  mutate(
    BWorder = case_when(
      component_matrix=="Between" & T_domain == "Education"     ~ DTexp_order_education,
      component_matrix=="Between" & T_domain == "Policing"      ~ DTexp_order_policing,
      component_matrix=="Between" &T_domain == "Child welfare" ~ DTexp_order_child,
      TRUE    ~ NA_real_  # If none match, return NA.
    ),
    WTorder = case_when(
      component_matrix=="Within" & T_decisiontype == "sanction collectives" ~ DTexp_order_SancCol,
      component_matrix=="Within" &T_decisiontype == "assist collectives"   ~ DTexp_order_AssistCol,
      component_matrix=="Within" &T_decisiontype == "assist individuals"   ~ DTexp_order_AssistInd,
      component_matrix=="Within" &T_decisiontype == "sanction individuals"  ~ DTexp_order_SancInd,
      TRUE                                       ~ NA_real_
    )
  )

# Create binary outcomes
combined_df <- combined_df %>%
  mutate(
    outcome_approp_bin3 = if_else(outcome_approp > 4, 1L, 0L),
    outcome_approp_bin2 = if_else(outcome_approp > 5, 1L, 0L),
    
    outcome_fair_bin3   = if_else(outcome_fair > 4, 1L, 0L),
    outcome_fair_bin2   = if_else(outcome_fair > 5, 1L, 0L),
    
    outcome_accur_bin3  = if_else(outcome_accur > 4, 1L, 0L),
    outcome_accur_bin2  = if_else(outcome_accur > 5, 1L, 0L)
  )

DT_long_data<-combined_df%>%
  dplyr::select(ResponseId, 
                female_fac, birthyear,low_edu_somecol_cat,white,high_diglit,AI_have_priorknow,inattentive,
                T_type_assisting: T_type_individuals, T_decisiontype,T_decision,
                assist_ind_decision_type:sanc_col_decision_type, 
                T_domain, T_type_assisting, T_type_individuals,
                edu_decision_type: child_decision_type,
                BWorder, WTorder,component_matrix,
                outcome_approp, outcome_fair,outcome_accur,
                outcome_approp_bin3:outcome_accur_bin2)

# Decision-Type Experiment: Save dataset (DT_long_data.RDS) -----------
saveRDS(object = DT_long_data, file = "DT_long_data.RDS")


### Prepare data for the Decision Maker Experiment --------- 
DM_df<-df%>%
 dplyr::select(ResponseId,Duration__in_seconds_,
               female:AI_have_priorknow,inattentive,
               DMexp_treatment_homless:DMexp_order_polic_first)

# Decision-Maker Experiment: Generate variables order (vignette presented first)  --------- 
DM_df<-DM_df%>%   
  mutate(DMexp_order_homless_first=case_when(DMexp_order_homless==1 | 
                                               DMexp_order_homless==2 & DMexp_order_child!=1 & DMexp_order_edu!=1 & DMexp_order_polic!=1~1))

DM_df<-DM_df%>%   
  mutate(DMexp_order_child_first=case_when(DMexp_order_child==1 | 
                                             DMexp_order_child==2 & DMexp_order_homless!=1 & DMexp_order_edu!=1 & DMexp_order_polic!=1~1))

DM_df<-DM_df%>%    
  mutate(DMexp_order_educ_first=case_when(DMexp_order_edu==1 | 
                                            DMexp_order_edu==2 & DMexp_order_homless!=1 & DMexp_order_child!=1 & DMexp_order_polic!=1~1))

DM_df<-DM_df%>%    
  mutate(DMexp_order_polic_first=case_when(DMexp_order_polic==1 | 
                                             DMexp_order_polic==2 & DMexp_order_homless!=1 & DMexp_order_child!=1 & DMexp_order_edu!=1~1))


# Decision-Maker Experiment: Generate binary outcome variables --------
DM_df<-DM_df%>%   
  mutate(DMexp_bin_outcome_homless=case_when(DMexp_outcome_homless==1 | DMexp_outcome_homless==2  ~ 1,
                                   DMexp_outcome_homless==3 | DMexp_outcome_homless==4 | DMexp_outcome_homless==5 ~ 0)) 
DM_df<-DM_df%>%   
  mutate(DMexp_bin_outcome_child=case_when(DMexp_outcome_child==1 | DMexp_outcome_child==2  ~ 1,
                                 DMexp_outcome_child==3 | DMexp_outcome_child==4 | DMexp_outcome_child==5 ~ 0)) 
DM_df<-DM_df%>%   
  mutate(DMexp_bin_outcome_educ=case_when(DMexp_outcome_educ==1 | DMexp_outcome_educ==2  ~ 1,
                                    DMexp_outcome_educ==3 | DMexp_outcome_educ==4 | DMexp_outcome_educ==5 ~ 0)) 
DM_df<-DM_df%>%   
  mutate(DMexp_bin_outcome_polic=case_when(DMexp_outcome_polic==1 | DMexp_outcome_polic==2  ~ 1,
                                  DMexp_outcome_polic==3 | DMexp_outcome_polic==4 | DMexp_outcome_polic==5 ~ 0)) 


DM_df$inattentive<-factor(DM_df$inattentive)

# Decision-Maker Experiment: Save dataset (DM_wide_data.RDS) -----------
setwd("~/Dropbox/JOP ADS/Replication_JOP")
saveRDS(object = DM_df, file = "DM_wide_data.RDS")
